# -------------------------------------------------------------------
# Purpose: Creates Table B22
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataconfanalysis, "surnameCountyLevel19001940.RData"))


# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))

r <- list()
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))

i <- list()
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_relgendist_max_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))


# F-statistics
dstata <- surnameCountyLevel19001940[, .(
  x1 = entropy_namelast_mp_relgendist_max_adjp_ws, z1 = iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws,
  x2 = entropy_namelast_mp_adjp_ws, z2 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x3 = sum_n_namelast_mp_adjp_ws, z3 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y1 = sum_patents_pc_1900_f_w, y2 = sum_break_p80_rrfsim05_pc_1900_f_w,
  gisjoin_1900_f, statefip_f, year_f, year_num, namelast_mp_f, wgt_name
)]
commands <- list(
  'ivreghdfe y1 (x1= z1), absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   replace swf1 = round(e(first)["SWF",1])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x3= z1 z3), absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x3= z1 z3), absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f namelast_mp_f#year_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x3= z1 z3), absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f namelast_mp_f#year_f c.year_num##gisjoin_1900_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f namelast_mp_f#year_f c.year_num##gisjoin_1900_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1'
)
swf_results <- as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata)))
swfstat <- rep(swf_results, times = 2)


# Create table
x <- na.omit(surnameCountyLevel19001940[, .(sum_patents_pc_1900_f_w, wgt_name)])
y1_mean <- round(weighted.mean(x$sum_patents_pc_1900_f_w, x$wgt_name), 2)
y1_sd <- round(sqrt(weighted.mean((x$sum_patents_pc_1900_f_w - y1_mean)^2, x$wgt_name)), 2)
x <- na.omit(surnameCountyLevel19001940[, .(sum_break_p80_rrfsim05_pc_1900_f_w, wgt_name)])
y2_mean <- round(weighted.mean(x$sum_break_p80_rrfsim05_pc_1900_f_w, x$wgt_name), 2)
y2_sd <- round(sqrt(weighted.mean((x$sum_break_p80_rrfsim05_pc_1900_f_w - y2_mean)^2, x$wgt_name)), 2)
y1 <- paste0("\\makecell{Patents \\\\ per 1,000 people \\\\ (mean = ", y1_mean, ", sd = ", y1_sd, ")}")
y2 <- paste0("\\makecell{Breakthrough patents \\\\ per 1,000 people \\\\ (mean = ", y2_mean, ", sd = ", y2_sd, ")}")

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    sum_n_namelast_mp_adjp_ws = "Population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    entropy_namelast_mp_relgendist_max_adjp_ws = "Genetic distance weighted surname diversity",
    iv_lo_entropy_namelast_mp_relgendist_max_adjp_fe_immig_ws = "Predicted genetic distance weighted surname diversity",
    sum_patents_pc_1900_f_w = y1, sum_break_p80_rrfsim05_pc_1900_f_w = y2,
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutputappendix, "tableB22.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{9}{c}{}\\\\ \\cmidrule(lr){1-11}")
add_table_row(tablename, "mean =", "\\cmidrule(lr){2-6}  \\cmidrule(lr){7-11}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{9}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{9}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel D: First-stage estimates}} &  \\multicolumn{9}{c}{}\\\\"))

temptable <- file.path(poutputappendix, "temp.tex")
etable(r,
  cluster = ~statefip,
  fitstat = ~n,
  order = c("Distance", "diversity", "Population"),
  digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-11}", estimates_rows))

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n,
  order = c("Distance", "diversity", "Population"),
  digits = "r3", digits.stats = "r3",
  extralines = list("Sanderson-Windmeijer \\textit{F}-stat" = swfstat),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-11}", estimates_rows))
add_table_row(tablename, "Sanderson", "\\\\", "before")

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  stage = 1,
  cluster = ~statefip,
  order = c("Distance", "diversity", "Population"),
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
estimates_rows1 <- collapse_stage1(estimates_rows, c(2, 3, 5, 7, 10, NA, 4, 6, 8, 11))
add_table_row(tablename, "Panel D", estimates_rows1)
add_table_row(tablename, "Panel D", c("\\cmidrule(lr){1-11}", "& \\multicolumn{5}{c}{\\makecell{Genetic distance weighted \\\\ surname diversity}} & \\multicolumn{5}{c}{Population}\\\\", "\\cmidrule(lr){2-6}  \\cmidrule(lr){7-11}"))
estimates_rows2 <- collapse_stage1(estimates_rows, c(NA, NA, NA, NA, 9, NA, NA, NA, NA, NA))
add_table_row(tablename, "County-Surname fixed", c("& \\multicolumn{5}{c}{Surname diversity}\\\\", "\\cmidrule(lr){2-6}", estimates_rows2, "\\\\"), "before")
remove_table_row(tablename, "County fixed effects")
file.remove(temptable)

cat("Table B22 saved to:", tablename, "\n")